#!/usr/bin/env python
# encoding: utf-8
"""
@author: youfeng
@email: youfeng243@163.com
@license: Apache Licence
@file: clean_in_time.py
@time: 2018/1/22 11:58
"""

import sys

sys.path.append('..')
from common import util
from common.mongo import MongDb
from logger import Logger

MONGO_CONF = {
    'host': '172.16.215.16',
    'port': 40042,
    'db': 'app_data',
    'username': 'work',
    'password': 'haizhi'
}

# 日志模块
log = Logger('clean_in_time.log').get_logger()

source_db = MongDb(MONGO_CONF['host'], MONGO_CONF['port'], MONGO_CONF['db'],
                   MONGO_CONF['username'],
                   MONGO_CONF['password'], log=log)


def main():
    table_name = 'enterprise_data_gov_new_2018_01_08'
    result_list = []
    count = 0
    total = 0
    for item in source_db.traverse_batch_field(table_name, {}, ['_id', '_in_time']):

        total += 1
        _in_time = item.get('_in_time')
        if isinstance(_in_time, basestring):
            continue

        result = {
            '_id': item.get("_id"),
            '_in_time': util.get_now_time(),
            '_utime': util.get_now_time(),
            'is_new_gsxt': True,
        }
        result_list.append(result)
        if len(result_list) >= 500:
            source_db.insert_batch_data(table_name, result_list)
            del result_list[:]

        count += 1
        if total % 1000 == 0:
            log.info("当前进度: total = {} count = {}".format(total, count))

    if len(result_list) > 0:
        source_db.insert_batch_data(table_name, result_list)
        del result_list[:]

    log.info("当前进度: total = {} count = {}".format(total, count))
    log.info("完成清洗")


if __name__ == '__main__':
    main()
